主要用SQL的指令先將我們所需要的資料從資料庫中取出,再用R將資料整理成我們要的形式。
(通常和伺服器密碼相同)
Servers>>PostgreSQL 14>>Databases>>CIQ_Target>>Schemas>>public>>Tables
View/Edit Data>>All Rows可察看完整資料表Query toolselect * from table_name -- *代表選擇這張表格的所有欄位
select column1, column2, column3 from table_name -- 也可以選擇其中多個欄位
select * from ciqgvkeyiid
F5在這個使用說明書中只會介紹可能會用到或較常用的指令
select distinctselect完全一樣,只是就會選取到欄位中不重複的資料select distinct column1, column2, column3 from table_name
select distinct * from ciqgvkeyiid
asas也可以不寫select column1 as new_column1 from table_name -- 欄的名稱
select column1 new_column1 from table_name -- 欄的名稱
select column1 from table_name as new_table_name -- 表的名稱
select column1 from table_name new_table_name -- 表的名稱
select
symbolid sid,
gvkey GVK,
iid as id,
relatedcompanyid comid,
exchangeid exid,
objectid obid,
symbolstartdate sdate,
symbolenddate edate,
activeflag aflag
from ciqgvkeyiid
whereselect * from table_name
where condition1
select * from ciqgvkeyiid
where iid = '01'
order byselect * from table_name
order by column1 ASC -- 從小到大
select * from table_name
order by column1 DESC -- 從大到小
select * from table_name
order by column1 ASC, column2 ASC -- 先排column1y在排column2
select * from ciqgvkeyiid
order by gvkey ASC
select * from ciqgvkeyiid
order by gvkey DESC
joinselect table_name1.column1, table_name2.column2
from table_name1
join table_name2 on condition1
select table_name1.column1, table_name2.column2
from table_name1
join table_name2 on condition1 and condition2 -- 可加不只一個條件
select table_name1.column1, table_name2.column2, table_name3.column3
from table_name1
join table_name2 on condition1
join table_name3 on condition2 -- 可加不只一張表格
select ciqgvkeyiid.gvkey,
spratingidentifier.relatedcompanyid
from ciqgvkeyiid
join spratingidentifier on spratingidentifier.relatedcompanyid = ciqgvkeyiid.relatedcompanyid
想抓出的資料有:
select distinct
GVK.gvkey,
RD.ratingsymbol as splticrm,
(date_trunc('MONTH', date(RD.ratingdate)) + INTERVAL '1 MONTH - 1 day')::DATE as datadate,
ELD.datavalue as country
from spratingdata as RD
join spEntityLevelData as ELD on RD.entitySymbolvalue = ELD.entitySymbolvalue and ratingDataItemId = 21 --21是國家代碼可抓出所有國家
join spratingidentifier RI on RI.symbolvalue = RD.entitysymbolvalue and symboltypeid = 73 -- CompanyID
join ciqgvkeyiid as GVK on RI.relatedcompanyid = GVK.relatedcompanyid
where ratingdate between '1990-1-1' and '2022-7-31' and ELD.datavalue='USA' and RD.ratingTypeCode='FCLONG'
order by GVK.gvkey ASC, datadate ASC
# read the rating data
sprating = read.csv('spratingData.csv')
sprating$gvkey = as.character(sprating$gvkey)# change the data type into character from integer
sprating$datadate = as.Date(sprating$datadate)# change the data type into the date from character
# take all of the gvkey
gvkey = unique(sprating$gvkey)
# build a function to transform the data
FillMisMonthData = function(df){
gvkeyid = df$gvkey[1]
library(dplyr)
# construct a matrix with all of the ends of the month between "1990-01-01" and "2022-07-31"
ts <- seq(as.Date("1990-02-01"),length=391,by="months")-1
df_time <- data.frame(datadate=ts)
data_eotm <- suppressMessages(full_join(df_time,df))
# fill the same data like gvkey and country
data_eotm$gvkey = gvkeyid
data_eotm$country = 'USA'
library(tidyr)
# fill the rating data with the last seen
data_eotm = data_eotm %>% fill(splticrm, .direction = 'down')
# switch the order in a data frame
data_eotm_fill = select(data_eotm, gvkey, splticrm, datadate, country)
return(data_eotm_fill)
}
# use the function on all gvkey, and bind together
sprating_fillmismonth = c()
library(progress)
pb <- progress_bar$new(total = length(gvkey))
for(id in gvkey){
sprating_gvkey = sprating[sprating$gvkey==id,]
sprating_gvkey = FillMisMonthData(sprating_gvkey)
sprating_fillmismonth = rbind(sprating_fillmismonth, sprating_gvkey)
pb$tick()
Sys.sleep(1/length(gvkey))
}
# save the file to txt or csv
write.table(sprating_fillmismonth, 'D:/spGlobal/sprating_EndOfTheMonth.txt', na = "", row.names=FALSE, quote = F, sep = "\t")